#coding=utf-8
import requests,json,urllib
import re
from bs4 import BeautifulSoup
import os,datetime
from TestWeb import mysql_connect
import json
import codecs

def write_result(result):
    os.chdir('D:\\python-photo\\log')  # 切换到当前目录
    report = open("information.txt", "a")  # 打开报告
    current_time = datetime.datetime.now()
    report.write("%s:" % current_time)
    report.write(result.encode('utf-8') + "\n")

#更新实际结果sql
def insert(image_href,name,subwayName,describe,detail):
    # 将实际结果写入至CY_project中
    connect = mysql_connect.mysql_connect()[0]  # connect是由connect_mysql函数返回的第一个值
    cursor = connect.cursor()
    # SQL 更新语句
    sql = "insert into mysql.CY_project(image_href,Tourism_name,subwayName,Tourism_describe,detail) values('%s','%s','%s','%s','%s')" % (image_href,name,subwayName,describe,detail)
    # 执行sql语句
    input = cursor.execute(sql)
    connect.commit()  # 实例提交命令

#将结果写入到json文件
def write_json(image_href,name,subwayName,describe,detail):
    os.chdir('D:\\python-photo\\log')  # 切换到当前目录
    report = open("food.json", "a")  # 打开报告
    file_path = 'D:\\python-photo\\log\\food.json'
    data = {}
    data['image_href'] = image_href
    data['name'] = name
    data['subwayName'] = subwayName
    data['describe'] = describe
    data['detail'] = detail
    try:
        with codecs.open(file_path,'a',encoding='utf-8') as fp:
            fp.write(json.dumps(data, ensure_ascii=False) + ",\n")
    except IOError as err:
        print('error' + str(err))
    finally:
        fp.close()

s = requests.session()
id = ['990124','990125','990135','990136']   #爬旅游景点的id
food_id = ['990114','990115','990116','990117']  #爬美食地点的id
for n in range(0,4):
    index = s.get('http://www.chengdurail.com/searchs/archives.html?ajax=true&category_id=11&tpl_file=foods_ck&linkage_id='+ str(food_id[n]) +'&p=1')   #旅游景点的category_id为26，美食的category_id为11
    index.encoding = 'utf-8'
    index_soup = BeautifulSoup(index.text,'html.parser')
    image = re.findall(r'<img src="(.+)" alt="" title="">',index.text)
    href = re.findall(r'<div class="img"><a href="(.+)" target="_blank">',index.text)
    name = re.findall(r'target="_blank">(.+)</a><span class="line-color">',index.text)
    subwayName = re.findall(r'<span class="line-color">(.+)</span>',index.text)
    describe = index_soup.find_all('p')
    # describe = describe_soup.get_text()
    num = len(image)
    for i in range(0,num):
        meilcd = []
        newimage = 'http://www.chengdurail.com'+image[i]
        newhref = href[i]
        newname = name[i]
        newsubwayName = subwayName[i]
        newdescribe = str(describe[i]).replace('<p class="p">','').replace('</p>','')
        detail_index = s.get('http://www.chengdurail.com'+newhref)
        detail_index.encoding = 'utf-8'
        soup = BeautifulSoup(detail_index.text, 'html.parser')
        detail = soup.find('div',id='detail_img')
        newdetail = detail.get_text()
        #print detail_index.text
        meilcd.append(newimage)
        meilcd.append(newname)
        meilcd.append(newsubwayName)
        meilcd.append(newdescribe)
        meilcd.append(newdetail)
        #写入数据库中
        # insert(meilcd[0],meilcd[1],meilcd[2],meilcd[3],meilcd[4])
        write_json(meilcd[0], meilcd[1], meilcd[2], meilcd[3], meilcd[4])
        n = len(meilcd)
        for j in range(0,n):
            print meilcd[j]

